import pandas as pd
import numpy as np
url = "https://docs.google.com/spreadsheets/d/1mS6khEB6m8cPNenNvY9Tg6bJ6YkmcvCI/export?format=csv&gid=1283335856"
df = pd.read_csv(url)
df.head()
| TERM | SUBJECT | NBR | COURSE NAME | SECTION | PROF | TOTAL | A+ | A | A- | ... | C | C- | D | F | W | INC/NA | AVG GPA | join the best club on campus | Unnamed: 22 | https://discord.gg/vxCqd4hdak | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | F2022 | ACCT | 100 | Fin & Mgr Acct | 1 | HO, V | 28 | 0 | 11 | 2 | ... | 3 | 0 | 0 | 0 | 5 | 0 | 3.383 | NaN | NaN | NaN |
| 1 | F2022 | ACCT | 101 | Intro Thry & Prac of Acct I | 11 | CHAN, J | 29 | 0 | 13 | 2 | ... | 0 | 1 | 0 | 0 | 3 | 0 | 3.464 | NaN | NaN | NaN |
| 2 | F2022 | ACCT | 101 | Intro Thry & Prac of Acct I | 5 | RUTHIZER, S | 50 | 2 | 9 | 14 | ... | 1 | 0 | 3 | 2 | 3 | 0 | 3.176 | NaN | NaN | NaN |
| 3 | F2022 | ACCT | 101 | Intro Thry & Prac of Acct I | 2 | GRUZA, M | 45 | 1 | 2 | 3 | ... | 0 | 0 | 0 | 0 | 25 | 0 | 3.067 | NaN | NaN | NaN |
| 4 | F2022 | ACCT | 101 | Intro Thry & Prac of Acct I | 4 | FEISULLIN, A | 45 | 4 | 6 | 1 | ... | 9 | 1 | 1 | 0 | 4 | 1 | 2.908 | NaN | NaN | NaN |
5 rows × 24 columns
important_columns = ["SUBJECT", "NBR", "COURSE NAME", "PROF", "TOTAL", "A+", "A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D", "F", "W", "AVG GPA"]
def gpa_letter_converter(gpa):
letter_grades = {
"A": 4.0,
"A-": (3.7, 3.8, 3.9),
"B+": (3.3, 3.4, 3.5, 3.6),
"B": (3.0, 3.1, 3.2),
"B-": (2.7, 2.8, 2.9),
"C+": (2.3, 2.4, 2.5, 2.6),
"C": (2.0, 2.1, 2.2),
"C-": (1.7, 1.8, 1.9),
"D": (1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6),
"F": (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)
}
for letter_grade, number_grade in letter_grades.items():
if isinstance(number_grade, float) and gpa == number_grade:
return letter_grade
elif isinstance(number_grade, tuple) and gpa in number_grade:
return letter_grade
return None
analysis_df = df[important_columns]
analysis_df.head()
| SUBJECT | NBR | COURSE NAME | PROF | TOTAL | A+ | A | A- | B+ | B | B- | C+ | C | C- | D | F | W | AVG GPA | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ACCT | 100 | Fin & Mgr Acct | HO, V | 28 | 0 | 11 | 2 | 1 | 3 | 3 | 0 | 3 | 0 | 0 | 0 | 5 | 3.383 |
| 1 | ACCT | 101 | Intro Thry & Prac of Acct I | CHAN, J | 29 | 0 | 13 | 2 | 0 | 4 | 5 | 0 | 0 | 1 | 0 | 0 | 3 | 3.464 |
| 2 | ACCT | 101 | Intro Thry & Prac of Acct I | RUTHIZER, S | 50 | 2 | 9 | 14 | 6 | 4 | 5 | 0 | 1 | 0 | 3 | 2 | 3 | 3.176 |
| 3 | ACCT | 101 | Intro Thry & Prac of Acct I | GRUZA, M | 45 | 1 | 2 | 3 | 1 | 5 | 0 | 6 | 0 | 0 | 0 | 0 | 25 | 3.067 |
| 4 | ACCT | 101 | Intro Thry & Prac of Acct I | FEISULLIN, A | 45 | 4 | 6 | 1 | 7 | 5 | 0 | 6 | 9 | 1 | 1 | 0 | 4 | 2.908 |
analysis_df = analysis_df.dropna(subset = "PROF")
analysis_df = analysis_df[analysis_df["AVG GPA"] != 0]
analysis_df.shape
(2333, 18)
cs_df = analysis_df[analysis_df["SUBJECT"] == "CSCI"]
pd.set_option('display.max_rows', None)
cs_df
| SUBJECT | NBR | COURSE NAME | PROF | TOTAL | A+ | A | A- | B+ | B | B- | C+ | C | C- | D | F | W | AVG GPA | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 582 | CSCI | 12 | Intro Computers & Computation | WAXMAN, J | 58 | 0 | 2 | 7 | 5 | 14 | 6 | 2 | 2 | 1 | 3 | 5 | 10 | 2.594 |
| 585 | CSCI | 48 | Spreadsheet Programming | FRIED, M | 131 | 19 | 10 | 23 | 28 | 17 | 12 | 4 | 1 | 0 | 1 | 1 | 12 | 3.354 |
| 586 | CSCI | 48 | Spreadsheet Programming | JAGDEO, M | 94 | 7 | 14 | 15 | 9 | 11 | 6 | 1 | 1 | 0 | 7 | 7 | 14 | 2.945 |
| 596 | CSCI | 100 | Information and Intelligence | CASTRO, R | 40 | 0 | 8 | 4 | 5 | 5 | 3 | 0 | 4 | 2 | 0 | 3 | 6 | 2.876 |
| 597 | CSCI | 111 | Intro Algorithmic Problem Solv | SCHLEY, R | 236 | 10 | 30 | 14 | 8 | 22 | 8 | 17 | 25 | 8 | 13 | 22 | 49 | 2.494 |
| 598 | CSCI | 111 | Intro Algorithmic Problem Solv | CHYN, X | 239 | 0 | 19 | 10 | 8 | 39 | 14 | 11 | 8 | 0 | 23 | 26 | 75 | 2.269 |
| 599 | CSCI | 111 | Intro Algorithmic Problem Solv | CHYN, X | 240 | 0 | 21 | 11 | 7 | 28 | 22 | 7 | 13 | 0 | 23 | 26 | 68 | 2.255 |
| 619 | CSCI | 112 | Intro Algori Prob Solving Java | CONNOR, T | 13 | 6 | 2 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 3.725 |
| 621 | CSCI | 211 | Object-Oriented Program in C++ | WAXMAN, J | 150 | 9 | 9 | 7 | 7 | 10 | 10 | 4 | 16 | 0 | 15 | 30 | 32 | 2.002 |
| 622 | CSCI | 211 | Object-Oriented Program in C++ | WAXMAN, J | 105 | 5 | 5 | 4 | 3 | 4 | 11 | 2 | 9 | 0 | 15 | 25 | 22 | 1.735 |
| 623 | CSCI | 211 | Object-Oriented Program in C++ | ALAYEV, Y | 16 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 2 | 11 | 1.400 |
| 635 | CSCI | 212 | Object-Oriented Prog in Java | TSE, C | 36 | 1 | 6 | 1 | 3 | 3 | 0 | 3 | 5 | 0 | 0 | 3 | 11 | 2.700 |
| 636 | CSCI | 212 | Object-Oriented Prog in Java | LORD, K | 161 | 5 | 6 | 11 | 11 | 13 | 13 | 10 | 12 | 9 | 9 | 21 | 39 | 2.220 |
| 637 | CSCI | 212 | Object-Oriented Prog in Java | LORD, K | 127 | 7 | 12 | 5 | 7 | 11 | 7 | 4 | 6 | 3 | 12 | 24 | 29 | 2.120 |
| 648 | CSCI | 220 | Discrete Structures | KAHROBAEI, D | 39 | 0 | 18 | 3 | 3 | 2 | 2 | 0 | 5 | 4 | 0 | 0 | 2 | 3.276 |
| 649 | CSCI | 220 | Discrete Structures | KAHROBAEI, D | 42 | 0 | 14 | 3 | 1 | 3 | 2 | 0 | 5 | 5 | 0 | 3 | 6 | 2.869 |
| 650 | CSCI | 220 | Discrete Structures | CHYN, X | 88 | 0 | 8 | 4 | 8 | 6 | 12 | 7 | 11 | 0 | 1 | 13 | 17 | 2.324 |
| 651 | CSCI | 220 | Discrete Structures | CHYN, X | 58 | 1 | 6 | 2 | 4 | 9 | 7 | 3 | 11 | 0 | 0 | 12 | 3 | 2.244 |
| 652 | CSCI | 220 | Discrete Structures | CESARETTI, P | 30 | 0 | 3 | 3 | 1 | 2 | 1 | 2 | 4 | 0 | 0 | 7 | 7 | 2.074 |
| 653 | CSCI | 240 | Computer Org & Assembly Lang | YEH, J | 45 | 2 | 7 | 3 | 4 | 7 | 7 | 3 | 3 | 3 | 0 | 2 | 4 | 2.883 |
| 654 | CSCI | 240 | Computer Org & Assembly Lang | YEH, J | 45 | 5 | 1 | 5 | 3 | 7 | 4 | 1 | 3 | 6 | 0 | 1 | 8 | 2.853 |
| 655 | CSCI | 240 | Computer Org & Assembly Lang | YEH, J | 45 | 0 | 4 | 3 | 4 | 5 | 5 | 7 | 2 | 4 | 0 | 4 | 7 | 2.518 |
| 656 | CSCI | 240 | Computer Org & Assembly Lang | SVITAK, J | 31 | 0 | 2 | 0 | 3 | 3 | 2 | 2 | 4 | 2 | 2 | 2 | 8 | 2.286 |
| 657 | CSCI | 240 | Computer Org & Assembly Lang | SVITAK, J | 26 | 0 | 2 | 3 | 1 | 3 | 0 | 1 | 4 | 0 | 4 | 2 | 6 | 2.285 |
| 658 | CSCI | 240 | Computer Org & Assembly Lang | SVITAK, J | 37 | 0 | 4 | 4 | 1 | 4 | 0 | 1 | 3 | 2 | 5 | 4 | 9 | 2.243 |
| 659 | CSCI | 313 | Data Structures | TAO, X | 39 | 9 | 6 | 2 | 8 | 5 | 3 | 0 | 0 | 0 | 0 | 4 | 2 | 3.159 |
| 660 | CSCI | 313 | Data Structures | TAO, X | 36 | 0 | 5 | 1 | 6 | 16 | 1 | 0 | 1 | 0 | 0 | 1 | 5 | 3.103 |
| 661 | CSCI | 313 | Data Structures | TAO, X | 24 | 2 | 2 | 2 | 1 | 6 | 1 | 0 | 0 | 0 | 0 | 6 | 4 | 2.370 |
| 662 | CSCI | 313 | Data Structures | TSAI, C | 10 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 6 | 2.325 |
| 663 | CSCI | 313 | Data Structures | STEINBERG, O | 31 | 3 | 2 | 0 | 4 | 1 | 3 | 3 | 3 | 3 | 6 | 2 | 1 | 2.277 |
| 664 | CSCI | 313 | Data Structures | SMITH-THOMPSON, A | 29 | 3 | 3 | 0 | 0 | 1 | 0 | 1 | 6 | 0 | 0 | 5 | 8 | 2.174 |
| 665 | CSCI | 313 | Data Structures | STEINBERG, O | 42 | 2 | 1 | 2 | 1 | 4 | 4 | 2 | 8 | 4 | 4 | 5 | 5 | 2.078 |
| 666 | CSCI | 313 | Data Structures | SMITH-THOMPSON, A | 25 | 0 | 2 | 0 | 0 | 1 | 1 | 1 | 6 | 0 | 0 | 3 | 11 | 2.000 |
| 667 | CSCI | 316 | Principles of Programming Lang | SMITH-THOMPSON, A | 35 | 0 | 5 | 0 | 2 | 9 | 6 | 4 | 6 | 0 | 0 | 0 | 1 | 2.844 |
| 668 | CSCI | 316 | Principles of Programming Lang | KONG, T | 35 | 3 | 6 | 1 | 3 | 3 | 3 | 1 | 4 | 0 | 1 | 4 | 5 | 2.690 |
| 669 | CSCI | 316 | Principles of Programming Lang | KONG, T | 35 | 1 | 1 | 3 | 3 | 4 | 3 | 1 | 3 | 0 | 1 | 5 | 9 | 2.336 |
| 670 | CSCI | 316 | Principles of Programming Lang | SVITAK, J | 28 | 0 | 0 | 1 | 0 | 5 | 1 | 1 | 3 | 2 | 7 | 2 | 6 | 1.823 |
| 671 | CSCI | 316 | Principles of Programming Lang | SVITAK, J | 30 | 0 | 0 | 0 | 2 | 1 | 1 | 2 | 4 | 2 | 6 | 3 | 9 | 1.633 |
| 672 | CSCI | 316 | Principles of Programming Lang | KONG, T | 61 | 0 | 5 | 0 | 2 | 4 | 3 | 3 | 4 | 0 | 2 | 23 | 13 | 1.383 |
| 673 | CSCI | 320 | Theory of Computation | OBRENIC, B | 86 | 2 | 11 | 2 | 7 | 11 | 10 | 7 | 12 | 0 | 0 | 15 | 9 | 2.371 |
| 674 | CSCI | 320 | Theory of Computation | BOKLAN, K | 37 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 2 | 30 | 2.286 |
| 675 | CSCI | 320 | Theory of Computation | OBRENIC, B | 109 | 0 | 6 | 0 | 4 | 19 | 9 | 6 | 19 | 0 | 1 | 24 | 21 | 1.947 |
| 676 | CSCI | 320 | Theory of Computation | BOKLAN, K | 28 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 3 | 0 | 0 | 4 | 19 | 1.411 |
| 677 | CSCI | 323 | Design & Analysis Algorithms | TAO, X | 30 | 7 | 5 | 7 | 3 | 3 | 1 | 0 | 2 | 0 | 0 | 0 | 2 | 3.554 |
| 678 | CSCI | 323 | Design & Analysis Algorithms | PHILLIPS, T | 18 | 1 | 0 | 4 | 1 | 0 | 1 | 1 | 3 | 0 | 0 | 1 | 6 | 2.758 |
| 679 | CSCI | 323 | Design & Analysis Algorithms | PHILLIPS, T | 25 | 2 | 2 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 4 | 11 | 2.364 |
| 680 | CSCI | 323 | Design & Analysis Algorithms | BROWN, T | 30 | 0 | 0 | 1 | 3 | 4 | 8 | 1 | 6 | 0 | 0 | 5 | 2 | 2.196 |
| 681 | CSCI | 323 | Design & Analysis Algorithms | GOSWAMI, M | 61 | 1 | 5 | 6 | 2 | 5 | 1 | 6 | 7 | 0 | 0 | 13 | 15 | 2.137 |
| 682 | CSCI | 331 | Database Systems | STEINBERG, O | 35 | 4 | 4 | 3 | 4 | 6 | 3 | 3 | 4 | 3 | 1 | 0 | 0 | 2.954 |
| 683 | CSCI | 331 | Database Systems | STEINBERG, O | 31 | 1 | 2 | 4 | 6 | 3 | 4 | 3 | 1 | 3 | 1 | 0 | 3 | 2.907 |
| 684 | CSCI | 331 | Database Systems | HELLER, P | 29 | 0 | 1 | 1 | 1 | 0 | 1 | 2 | 5 | 0 | 0 | 1 | 17 | 2.358 |
| 685 | CSCI | 331 | Database Systems | LEAVITT, D | 39 | 1 | 4 | 4 | 3 | 2 | 4 | 1 | 4 | 0 | 1 | 7 | 8 | 2.348 |
| 686 | CSCI | 331 | Database Systems | OBRENIC, B | 83 | 3 | 3 | 0 | 2 | 6 | 9 | 7 | 29 | 0 | 1 | 13 | 10 | 2.027 |
| 687 | CSCI | 334 | Data Mining and Warehousing | SY, B | 13 | 0 | 1 | 2 | 0 | 2 | 0 | 1 | 1 | 0 | 0 | 0 | 6 | 3.100 |
| 688 | CSCI | 335 | Information Org and Retrieval | GOLDBERG, R | 33 | 0 | 4 | 4 | 4 | 5 | 3 | 2 | 5 | 0 | 0 | 1 | 5 | 2.918 |
| 689 | CSCI | 340 | Operating Systems Principles | SVADLENKA, J | 33 | 2 | 3 | 1 | 1 | 3 | 3 | 2 | 1 | 1 | 0 | 0 | 14 | 3.082 |
| 690 | CSCI | 340 | Operating Systems Principles | SVADLENKA, J | 22 | 1 | 2 | 0 | 3 | 2 | 1 | 0 | 3 | 0 | 1 | 0 | 9 | 2.892 |
| 691 | CSCI | 340 | Operating Systems Principles | SVADLENKA, J | 37 | 1 | 3 | 3 | 2 | 1 | 5 | 4 | 1 | 1 | 1 | 2 | 13 | 2.671 |
| 692 | CSCI | 340 | Operating Systems Principles | SVADLENKA, J | 29 | 2 | 2 | 1 | 3 | 3 | 2 | 1 | 1 | 0 | 2 | 2 | 10 | 2.647 |
| 693 | CSCI | 340 | Operating Systems Principles | FLUTURE, S | 37 | 0 | 0 | 4 | 4 | 3 | 7 | 6 | 3 | 0 | 0 | 2 | 8 | 2.610 |
| 694 | CSCI | 340 | Operating Systems Principles | SVADLENKA, J | 33 | 2 | 0 | 1 | 0 | 2 | 3 | 3 | 2 | 2 | 4 | 1 | 13 | 2.205 |
| 695 | CSCI | 340 | Operating Systems Principles | FLUTURE, S | 37 | 0 | 1 | 2 | 0 | 0 | 4 | 5 | 7 | 0 | 0 | 3 | 13 | 2.168 |
| 696 | CSCI | 343 | Computer Architecture | RAHMAN, M | 34 | 2 | 2 | 6 | 3 | 9 | 5 | 5 | 1 | 0 | 0 | 1 | 0 | 3.003 |
| 697 | CSCI | 343 | Computer Architecture | FLUTURE, S | 31 | 2 | 4 | 0 | 0 | 2 | 1 | 2 | 2 | 0 | 0 | 2 | 16 | 2.753 |
| 698 | CSCI | 343 | Computer Architecture | FLUTURE, S | 70 | 1 | 9 | 2 | 3 | 1 | 4 | 5 | 2 | 0 | 0 | 5 | 38 | 2.706 |
| 699 | CSCI | 343 | Computer Architecture | SMITH-THOMPSON, A | 32 | 3 | 4 | 4 | 4 | 2 | 1 | 1 | 3 | 0 | 0 | 6 | 3 | 2.607 |
| 700 | CSCI | 343 | Computer Architecture | UPADHYAY, V | 32 | 0 | 2 | 2 | 1 | 1 | 0 | 2 | 10 | 0 | 0 | 1 | 13 | 2.437 |
| 701 | CSCI | 343 | Computer Architecture | SMITH-THOMPSON, A | 32 | 2 | 3 | 4 | 2 | 3 | 0 | 0 | 5 | 0 | 0 | 8 | 5 | 2.237 |
| 702 | CSCI | 343 | Computer Architecture | UPADHYAY, V | 32 | 1 | 0 | 1 | 1 | 1 | 0 | 4 | 8 | 0 | 0 | 9 | 7 | 1.568 |
| 703 | CSCI | 344 | Distributed Systems | FLUTURE, S | 22 | 1 | 1 | 0 | 0 | 1 | 2 | 2 | 3 | 0 | 0 | 1 | 10 | 2.455 |
| 704 | CSCI | 348 | Data Communications | RAHMAN, M | 26 | 0 | 2 | 5 | 3 | 6 | 3 | 1 | 4 | 0 | 0 | 1 | 1 | 2.912 |
| 705 | CSCI | 355 | Internet and Web Technologies | FRIED, M | 45 | 0 | 8 | 5 | 10 | 7 | 9 | 4 | 0 | 0 | 0 | 0 | 2 | 3.209 |
| 706 | CSCI | 355 | Internet and Web Technologies | LAW, R | 24 | 2 | 2 | 1 | 1 | 4 | 1 | 3 | 3 | 0 | 0 | 1 | 6 | 2.811 |
| 707 | CSCI | 355 | Internet and Web Technologies | LAW, R | 22 | 2 | 1 | 2 | 2 | 2 | 0 | 1 | 1 | 0 | 0 | 5 | 5 | 2.269 |
| 708 | CSCI | 355 | Internet and Web Technologies | LAW, R | 33 | 2 | 2 | 1 | 2 | 2 | 1 | 1 | 2 | 0 | 0 | 10 | 10 | 1.796 |
| 709 | CSCI | 370 | Software Engineering | GREENBERG, A | 29 | 1 | 8 | 0 | 5 | 9 | 0 | 1 | 0 | 1 | 0 | 0 | 4 | 3.340 |
| 710 | CSCI | 370 | Software Engineering | GREENBERG, A | 35 | 0 | 10 | 0 | 7 | 11 | 1 | 1 | 3 | 0 | 1 | 0 | 1 | 3.179 |
| 711 | CSCI | 370 | Software Engineering | GREENBERG, A | 30 | 1 | 5 | 1 | 5 | 6 | 1 | 5 | 4 | 0 | 0 | 1 | 1 | 2.910 |
| 712 | CSCI | 370 | Software Engineering | ABREU, A | 32 | 0 | 3 | 1 | 5 | 5 | 6 | 4 | 1 | 0 | 1 | 5 | 1 | 2.439 |
| 713 | CSCI | 370 | Software Engineering | ABREU, A | 21 | 0 | 1 | 0 | 2 | 2 | 0 | 2 | 1 | 3 | 5 | 1 | 4 | 1.959 |
| 714 | CSCI | 381 | VT: Special Topics in Comp Sci | PHILLIPS, T | 16 | 2 | 4 | 4 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 3 | 3.369 |
| 715 | CSCI | 381 | VT: Special Topics in Comp Sci | HELLER, P | 33 | 1 | 8 | 0 | 7 | 2 | 5 | 2 | 1 | 0 | 0 | 0 | 7 | 3.277 |
| 716 | CSCI | 381 | VT: Special Topics in Comp Sci | RODAY, R | 39 | 2 | 7 | 7 | 3 | 7 | 4 | 1 | 4 | 0 | 0 | 0 | 4 | 3.254 |
| 717 | CSCI | 381 | VT: Special Topics in Comp Sci | WAXMAN, J | 30 | 11 | 3 | 3 | 1 | 1 | 1 | 1 | 3 | 0 | 0 | 2 | 4 | 3.246 |
| 718 | CSCI | 381 | VT: Special Topics in Comp Sci | GOLDBERG, R | 32 | 0 | 5 | 3 | 4 | 7 | 2 | 3 | 3 | 0 | 0 | 0 | 5 | 3.096 |
| 719 | CSCI | 381 | VT: Special Topics in Comp Sci | GOLDBERG, R | 33 | 0 | 10 | 2 | 3 | 0 | 4 | 3 | 6 | 0 | 1 | 0 | 4 | 3.034 |
| 720 | CSCI | 381 | VT: Special Topics in Comp Sci | TSAI, C | 16 | 0 | 1 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 1 | 9 | 2.571 |
| 721 | CSCI | 381 | VT: Special Topics in Comp Sci | ROZOVSKAYA, A | 20 | 0 | 1 | 0 | 2 | 3 | 0 | 2 | 6 | 0 | 0 | 1 | 5 | 2.413 |
| 722 | CSCI | 381 | VT: Special Topics in Comp Sci | GRYAK, J | 29 | 0 | 1 | 1 | 0 | 4 | 4 | 0 | 3 | 0 | 0 | 3 | 13 | 2.281 |
| 723 | CSCI | 381 | VT: Special Topics in Comp Sci | ROZOVSKAYA, A | 29 | 0 | 1 | 1 | 5 | 1 | 3 | 1 | 3 | 0 | 0 | 5 | 9 | 2.180 |
| 724 | CSCI | 381 | VT: Special Topics in Comp Sci | BROWN, T | 27 | 0 | 0 | 0 | 2 | 2 | 4 | 1 | 12 | 0 | 0 | 3 | 3 | 2.071 |
unique(): This function returns an array of all unique values in the order that they appear in the original DataFrame or Series. It's useful when you want to see or use the actual unique values (https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.unique.html)
nunique(): This function returns an integer that represents the number of unique values. It's useful when you just want to know how many unique values exist, rather than what those unique values are (https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.nunique.html)
def calculate_average_gpas(df):
# Prepare a list to store the results
results = []
# GPA equivalents for each letter grade
letter_grades_to_gpa = {
"A+": 4.0,
"A": 4.0,
"A-": 3.7,
"B+": 3.3,
"B": 3.0,
"B-": 2.7,
"C+": 2.3,
"C": 2.0,
"C-": 1.7,
"D": 1.0,
"F": 0.0
}
# Loop over all unique course numbers
for class_nbr in df["NBR"].unique():
# Filter the DataFrame for the current course number
df_nbr = df[df["NBR"] == class_nbr]
# Loop over all unique professors for the current course number
for prof in df_nbr["PROF"].unique():
# Filter the DataFrame for the current professor
df_prof = df_nbr[df_nbr["PROF"] == prof]
# Calculate the average GPA for the current professor and course number
avg_gpa_prof = round(df_prof["AVG GPA"].mean(), 1)
# Convert the individual grade counts to GPA equivalents and calculate the standard deviation
gpa_distributions = []
for grade_letter, gpa in letter_grades_to_gpa.items():
gpa_distributions.extend([gpa] * df_prof[grade_letter].sum())
std_dev_gpa_prof = round(np.std(gpa_distributions), 2)
# Append the result to the list
results.append({
"CLASS NUMBER": class_nbr,
"PROF": prof,
"AVG GPA PROF": avg_gpa_prof,
"AVG GPA PROF LETTER": gpa_letter_converter(avg_gpa_prof),
"STD DEV GPA PROF": std_dev_gpa_prof
})
# If there is more than one professor for this course, calculate the average GPA for the current course number, regardless of the professor
if df_nbr["PROF"].nunique() > 1:
avg_gpa_nbr = round(df_nbr["AVG GPA"].mean(), 1)
# Convert the individual grade counts to GPA equivalents and calculate the standard deviation
gpa_distributions = []
for grade_letter, gpa in letter_grades_to_gpa.items():
gpa_distributions.extend([gpa] * df_nbr[grade_letter].sum())
std_dev_gpa_nbr = round(np.std(gpa_distributions), 2)
# Append the result to the list
results.append({
"CLASS NUMBER": class_nbr,
"PROF": "All Professors",
"AVG GPA PROF": avg_gpa_nbr,
"AVG GPA PROF LETTER": gpa_letter_converter(avg_gpa_nbr),
"STD DEV GPA PROF": std_dev_gpa_nbr
})
# Convert the list of results to a DataFrame
df_results = pd.DataFrame(results)
# Find the hardest class based on average GPA
hardest_class = df_results[df_results["PROF"] == "All Professors"].sort_values("AVG GPA PROF").iloc[0]
# Calculate the average GPA for the entire subject
avg_gpa_subject = round(df["AVG GPA"].mean(), 1)
print(f"Average GPA for this entire subject in Fall 2022 is: {avg_gpa_subject}, which is a {gpa_letter_converter(avg_gpa_subject)}")
print(f"The hardest class based on average GPA in Fall 2022 is class number: {hardest_class['CLASS NUMBER']} with an average GPA of {hardest_class['AVG GPA PROF']}, which is a {hardest_class['AVG GPA PROF LETTER']}")
print("\nStandard deviation tells us about the spread of the grades that students received in each class. A higher standard deviation indicates a wider range of grades, while a lower standard deviation indicates that grades were more closely clustered around the average.")
return df_results
cs_df_results = calculate_average_gpas(cs_df)
cs_df_results
Average GPA for this entire subject in Fall 2022 is: 2.5, which is a C+ The hardest class based on average GPA in Fall 2022 is class number: 211 with an average GPA of 1.7, which is a C- Standard deviation tells us about the spread of the grades that students received in each class. A higher standard deviation indicates a wider range of grades, while a lower standard deviation indicates that grades were more closely clustered around the average.
| CLASS NUMBER | PROF | AVG GPA PROF | AVG GPA PROF LETTER | STD DEV GPA PROF | |
|---|---|---|---|---|---|
| 0 | 12 | WAXMAN, J | 2.6 | C+ | 1.13 |
| 1 | 48 | FRIED, M | 3.4 | B+ | 0.62 |
| 2 | 48 | JAGDEO, M | 2.9 | B- | 1.25 |
| 3 | 48 | All Professors | 3.1 | B | 0.95 |
| 4 | 100 | CASTRO, R | 2.9 | B- | 1.14 |
| 5 | 111 | SCHLEY, R | 2.5 | C+ | 1.30 |
| 6 | 111 | CHYN, X | 2.3 | C+ | 1.34 |
| 7 | 111 | All Professors | 2.3 | C+ | 1.33 |
| 8 | 112 | CONNOR, T | 3.7 | A- | 0.44 |
| 9 | 211 | WAXMAN, J | 1.9 | C- | 1.47 |
| 10 | 211 | ALAYEV, Y | 1.4 | D | 1.50 |
| 11 | 211 | All Professors | 1.7 | C- | 1.47 |
| 12 | 212 | TSE, C | 2.7 | B- | 1.25 |
| 13 | 212 | LORD, K | 2.2 | C | 1.40 |
| 14 | 212 | All Professors | 2.3 | C+ | 1.39 |
| 15 | 220 | KAHROBAEI, D | 3.1 | B | 1.09 |
| 16 | 220 | CHYN, X | 2.3 | C+ | 1.31 |
| 17 | 220 | CESARETTI, P | 2.1 | C | 1.52 |
| 18 | 220 | All Professors | 2.6 | C+ | 1.32 |
| 19 | 240 | YEH, J | 2.8 | B- | 1.01 |
| 20 | 240 | SVITAK, J | 2.3 | C+ | 1.26 |
| 21 | 240 | All Professors | 2.5 | C+ | 1.14 |
| 22 | 313 | TAO, X | 2.9 | B- | 1.21 |
| 23 | 313 | TSAI, C | 2.3 | C+ | 1.52 |
| 24 | 313 | STEINBERG, O | 2.2 | C | 1.17 |
| 25 | 313 | SMITH-THOMPSON, A | 2.1 | C | 1.42 |
| 26 | 313 | All Professors | 2.4 | C+ | 1.30 |
| 27 | 316 | SMITH-THOMPSON, A | 2.8 | B- | 0.63 |
| 28 | 316 | KONG, T | 2.1 | C | 1.54 |
| 29 | 316 | SVITAK, J | 1.7 | C- | 1.01 |
| 30 | 316 | All Professors | 2.1 | C | 1.35 |
| 31 | 320 | OBRENIC, B | 2.2 | C | 1.34 |
| 32 | 320 | BOKLAN, K | 1.8 | C- | 1.58 |
| 33 | 320 | All Professors | 2.0 | C | 1.36 |
| 34 | 323 | TAO, X | 3.6 | B+ | 0.57 |
| 35 | 323 | PHILLIPS, T | 2.6 | C+ | 1.41 |
| 36 | 323 | BROWN, T | 2.2 | C | 1.12 |
| 37 | 323 | GOSWAMI, M | 2.1 | C | 1.49 |
| 38 | 323 | All Professors | 2.6 | C+ | 1.36 |
| 39 | 331 | STEINBERG, O | 2.9 | B- | 0.81 |
| 40 | 331 | HELLER, P | 2.4 | C+ | 0.99 |
| 41 | 331 | LEAVITT, D | 2.3 | C+ | 1.46 |
| 42 | 331 | OBRENIC, B | 2.0 | C | 1.12 |
| 43 | 331 | All Professors | 2.5 | C+ | 1.16 |
| 44 | 334 | SY, B | 3.1 | B | 0.70 |
| 45 | 335 | GOLDBERG, R | 2.9 | B- | 0.87 |
| 46 | 340 | SVADLENKA, J | 2.7 | B- | 1.09 |
| 47 | 340 | FLUTURE, S | 2.4 | C+ | 0.98 |
| 48 | 340 | All Professors | 2.6 | C+ | 1.05 |
| 49 | 343 | RAHMAN, M | 3.0 | B | 0.77 |
| 50 | 343 | FLUTURE, S | 2.7 | B- | 1.34 |
| 51 | 343 | SMITH-THOMPSON, A | 2.4 | C+ | 1.55 |
| 52 | 343 | UPADHYAY, V | 2.0 | C | 1.23 |
| 53 | 343 | All Professors | 2.5 | C+ | 1.35 |
| 54 | 344 | FLUTURE, S | 2.5 | C+ | 1.04 |
| 55 | 348 | RAHMAN, M | 2.9 | B- | 0.86 |
| 56 | 355 | FRIED, M | 3.2 | B | 0.53 |
| 57 | 355 | LAW, R | 2.3 | C+ | 1.53 |
| 58 | 355 | All Professors | 2.5 | C+ | 1.30 |
| 59 | 370 | GREENBERG, A | 3.1 | B | 0.76 |
| 60 | 370 | ABREU, A | 2.2 | C | 1.19 |
| 61 | 370 | All Professors | 2.8 | B- | 1.02 |
| 62 | 381 | PHILLIPS, T | 3.4 | B+ | 1.10 |
| 63 | 381 | HELLER, P | 3.3 | B+ | 0.62 |
| 64 | 381 | RODAY, R | 3.3 | B+ | 0.66 |
| 65 | 381 | WAXMAN, J | 3.2 | B | 1.17 |
| 66 | 381 | GOLDBERG, R | 3.1 | B | 0.78 |
| 67 | 381 | TSAI, C | 2.6 | C+ | 1.15 |
| 68 | 381 | ROZOVSKAYA, A | 2.3 | C+ | 1.19 |
| 69 | 381 | GRYAK, J | 2.3 | C+ | 1.21 |
| 70 | 381 | BROWN, T | 2.1 | C | 0.90 |
| 71 | 381 | All Professors | 2.8 | B- | 1.06 |
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure()
all_prof_df_results = cs_df_results[cs_df_results["PROF"] == 'All Professors']
sns.histplot(all_prof_df_results["STD DEV GPA PROF"], kde=True, color='skyblue')
plt.axvline(all_prof_df_results["STD DEV GPA PROF"].mean(), color='red', linestyle='dashed', linewidth=1, label='Mean')
min_ylim, max_ylim = plt.ylim()
plt.text(all_prof_df_results["STD DEV GPA PROF"].mean()*1.12, max_ylim*0.9, 'Mean: {:.2f}'.format(all_prof_df_results["STD DEV GPA PROF"].mean()))
# Generate a color palette with as many colors as classes
colors = sns.color_palette("husl", len(all_prof_df_results))
for idx, (_, row) in enumerate(all_prof_df_results.iterrows()):
plt.axvline(row["STD DEV GPA PROF"], color=colors[idx], linestyle='dotted', linewidth=0.5, label=row["CLASS NUMBER"])
plt.title("Distribution of Standard Deviations of GPAs")
plt.xlabel("Standard Deviation")
plt.ylabel("Frequency")
plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1)) # Adjusted the location to ensure it doesn't overlap with the plot
plt.show()
def calculate_teacher_gpas(df):
# Prepare a list to store the results
results = []
# GPA equivalents for each letter grade
letter_grades_to_gpa = {
"A+": 4.0,
"A": 4.0,
"A-": 3.7,
"B+": 3.3,
"B": 3.0,
"B-": 2.7,
"C+": 2.3,
"C": 2.0,
"C-": 1.7,
"D": 1.0,
"F": 0.0
}
# Loop over all unique professors
for prof in sorted(df["PROF"].unique()):
# Filter the DataFrame for the current professor
df_prof = df[df["PROF"] == prof]
# Calculate the average GPA for the current professor
avg_gpa_prof = round(df_prof["AVG GPA"].mean(), 1)
# Convert the individual grade counts to GPA equivalents and calculate the standard deviation
gpa_distributions = []
for grade_letter, gpa in letter_grades_to_gpa.items():
gpa_distributions.extend([gpa] * df_prof[grade_letter].sum())
std_dev_gpa_prof = round(np.std(gpa_distributions), 1)
# Calculate the percentage of students who withdrew for the current professor
withdraw_percentage = df_prof["W"].sum() / df_prof["TOTAL"].sum()
# Append the result to the list
results.append({
"PROF": prof,
"AVG GPA PROF": avg_gpa_prof,
"AVG GPA PROF LETTER": gpa_letter_converter(avg_gpa_prof),
"STD DEV GPA PROF": std_dev_gpa_prof,
"NUM OF CLASSES": len(df_prof),
"WITHDRAW PERCENTAGE": round(withdraw_percentage * 100, 1)
})
# Convert the list of results to a DataFrame
df_results = pd.DataFrame(results)
# Calculate the average GPA for the entire subject
avg_gpa_subject = round(df["AVG GPA"].mean(), 1)
# Calculate the average standard deviation for the entire subject
gpa_distributions = []
for grade_letter, gpa in letter_grades_to_gpa.items():
gpa_distributions.extend([gpa] * df[grade_letter].sum())
avg_std_dev_subject = round(np.std(gpa_distributions), 1)
# Calculate the average withdrawal percentage for the entire subject
withdraw_percentage_subject = round((df["W"].sum() / df["TOTAL"].sum()) * 100, 2)
# Find the professors who teach more than one class, have a GPA that is the same or higher than the subject average GPA,
# and for whom less than 40% of students withdrew
best_profs = df_results[(df_results["NUM OF CLASSES"] > 1) & (df_results["AVG GPA PROF"] >= avg_gpa_subject) & (df_results["WITHDRAW PERCENTAGE"] <= withdraw_percentage_subject)]
print(f"Professors who teach more than one class, have a GPA that is the same or higher than the subject average GPA ({avg_gpa_subject}), and have a withdrawal percentage that is less or equal than the subject average withdrawal percentage ({withdraw_percentage_subject}%):\n")
print(best_profs, "\n")
print("Note: This ignores rate my professor ratings. This is soley based on GPA and it doesn't consider how much students actually learn from these teachers!")
print(f"Average standard deviation of GPA for all teachers in this subject in Fall 2022 is: {avg_std_dev_subject}")
return df_results
teacher_cs_df_results = calculate_teacher_gpas(cs_df)
teacher_cs_df_results
Professors who teach more than one class, have a GPA that is the same or higher than the subject average GPA (2.5), and have a withdrawal percentage that is less or equal than the subject average withdrawal percentage (22.26%):
PROF AVG GPA PROF AVG GPA PROF LETTER STD DEV GPA PROF \
9 FRIED, M 3.3 B+ 0.6
10 GOLDBERG, R 3.0 B 0.8
12 GREENBERG, A 3.1 B 0.8
16 KAHROBAEI, D 3.1 B 1.1
23 RAHMAN, M 3.0 B 0.8
28 STEINBERG, O 2.6 C+ 1.1
32 TAO, X 3.0 B 1.1
37 YEH, J 2.8 B- 1.0
NUM OF CLASSES WITHDRAW PERCENTAGE
9 2 8.0
10 3 14.3
12 3 6.4
16 2 9.9
23 2 1.7
28 4 6.5
32 4 10.1
37 3 14.1
Note: This ignores rate my professor ratings. This is soley based on GPA and it doesn't consider how much students actually learn from these teachers!
Average standard deviation of GPA for all teachers in this subject in Fall 2022 is: 1.3
| PROF | AVG GPA PROF | AVG GPA PROF LETTER | STD DEV GPA PROF | NUM OF CLASSES | WITHDRAW PERCENTAGE | |
|---|---|---|---|---|---|---|
| 0 | ABREU, A | 2.2 | C | 1.2 | 2 | 9.4 |
| 1 | ALAYEV, Y | 1.4 | D | 1.5 | 1 | 68.8 |
| 2 | BOKLAN, K | 1.8 | C- | 1.6 | 2 | 75.4 |
| 3 | BROWN, T | 2.1 | C | 1.0 | 2 | 8.8 |
| 4 | CASTRO, R | 2.9 | B- | 1.1 | 1 | 15.0 |
| 5 | CESARETTI, P | 2.1 | C | 1.5 | 1 | 23.3 |
| 6 | CHYN, X | 2.3 | C+ | 1.3 | 4 | 26.1 |
| 7 | CONNOR, T | 3.7 | A- | 0.4 | 1 | 7.7 |
| 8 | FLUTURE, S | 2.5 | C+ | 1.2 | 5 | 43.1 |
| 9 | FRIED, M | 3.3 | B+ | 0.6 | 2 | 8.0 |
| 10 | GOLDBERG, R | 3.0 | B | 0.8 | 3 | 14.3 |
| 11 | GOSWAMI, M | 2.1 | C | 1.5 | 1 | 24.6 |
| 12 | GREENBERG, A | 3.1 | B | 0.8 | 3 | 6.4 |
| 13 | GRYAK, J | 2.3 | C+ | 1.2 | 1 | 44.8 |
| 14 | HELLER, P | 2.8 | B- | 0.9 | 2 | 38.7 |
| 15 | JAGDEO, M | 2.9 | B- | 1.3 | 1 | 14.9 |
| 16 | KAHROBAEI, D | 3.1 | B | 1.1 | 2 | 9.9 |
| 17 | KONG, T | 2.1 | C | 1.5 | 3 | 20.6 |
| 18 | LAW, R | 2.3 | C+ | 1.5 | 3 | 26.6 |
| 19 | LEAVITT, D | 2.3 | C+ | 1.5 | 1 | 20.5 |
| 20 | LORD, K | 2.2 | C | 1.4 | 2 | 23.6 |
| 21 | OBRENIC, B | 2.1 | C | 1.3 | 3 | 14.4 |
| 22 | PHILLIPS, T | 2.8 | B- | 1.4 | 3 | 33.9 |
| 23 | RAHMAN, M | 3.0 | B | 0.8 | 2 | 1.7 |
| 24 | RODAY, R | 3.3 | B+ | 0.7 | 1 | 10.3 |
| 25 | ROZOVSKAYA, A | 2.3 | C+ | 1.2 | 2 | 28.6 |
| 26 | SCHLEY, R | 2.5 | C+ | 1.3 | 1 | 20.8 |
| 27 | SMITH-THOMPSON, A | 2.4 | C+ | 1.4 | 5 | 18.3 |
| 28 | STEINBERG, O | 2.6 | C+ | 1.1 | 4 | 6.5 |
| 29 | SVADLENKA, J | 2.7 | B- | 1.1 | 5 | 38.3 |
| 30 | SVITAK, J | 2.1 | C | 1.2 | 5 | 25.0 |
| 31 | SY, B | 3.1 | B | 0.7 | 1 | 46.2 |
| 32 | TAO, X | 3.0 | B | 1.1 | 4 | 10.1 |
| 33 | TSAI, C | 2.4 | C+ | 1.3 | 2 | 57.7 |
| 34 | TSE, C | 2.7 | B- | 1.2 | 1 | 30.6 |
| 35 | UPADHYAY, V | 2.0 | C | 1.2 | 2 | 31.2 |
| 36 | WAXMAN, J | 2.4 | C+ | 1.5 | 4 | 19.8 |
| 37 | YEH, J | 2.8 | B- | 1.0 | 3 | 14.1 |
import plotly.express as px
# Set the overall average GPA
mean_gpa = 2.5
# Create the scatter plot using Plotly Express
fig = px.scatter(teacher_cs_df_results,
x=list(range(len(teacher_cs_df_results))),
y='AVG GPA PROF',
hover_name='PROF', # This will show the professor's name when hovering over a point
title="CS Professor GPA Averages vs CS Subject Average GPA",
labels={'x': 'Professor (By Index Above)', 'y': 'Average GPA'},
size_max=100)
# Add a line for the average GPA
fig.add_shape(
type='line',
line=dict(dash='dash', color='red'),
x0=0,
x1=len(teacher_cs_df_results),
y0=mean_gpa,
y1=mean_gpa,
)
# Show the plot
fig.show()
# Count the number of classes with an average GPA at or greater than 3.0 and those less than 3.0
green_percentage = teacher_cs_df_results[teacher_cs_df_results['AVG GPA PROF'] >= 3.0].shape[0]
red_percentage = teacher_cs_df_results[teacher_cs_df_results['AVG GPA PROF'] < 3.0].shape[0]
# Create the values and labels for the pie chart
values = [green_percentage, red_percentage]
labels = ['At or above 3.0 (B or above)', 'At or below 2.7 (B- or below)']
# Define the colors for each section (green and red)
colors = ['#77dd77', '#ff6961']
# Plot the pie chart
plt.figure(figsize = (6, 6))
plt.pie(values, labels = labels, colors = colors, autopct = '%1.1f%%')
# Set the title
plt.title("Average GPA of all CS Teachers")
# Show the plot
plt.show()